Setup libraries

library(tidyverse)
library(terra)
library(MCMCvis)

# ggplot theme set
theme_set(theme_bw())

setup data

wd <- "/Users/elaw/Desktop/LeuphanaProject/BirdModelling/ETH_birds"
results_folder <- paste0(wd, "/Results/")  
version_folder <- "v10/"
mydate <- 20221207
samples <- readRDS(paste0(results_folder, version_folder, "BirdOccMod_dOcc_samples_farm_", mydate, "_seed101.RDS"))
data_input <- readRDS(paste0(wd, "/Data/Farm_nimbleOccData_v6_", mydate, ".RDS"))
list2env(data_input[[1]], envir = .GlobalEnv); list2env(data_input[[2]], envir = .GlobalEnv); rm(data_input)
## <environment: R_GlobalEnv>
## <environment: R_GlobalEnv>
rast_folder <- "/Users/elaw/Desktop/LeuphanaProject/ETH_SpatialData/data_10m/Baseline/"
farm_stack <- rast(paste0(rast_folder, "farm_stack_10m_Baseline.tif"))
pred_stack <- rast(paste0(results_folder, version_folder, "farm_pred_stack_10m_Baseline.tif"))

rast_vals <- values(pred_stack, na.rm=TRUE) %>% as_tibble()
site_vals <- as_tibble(Xoc); names(site_vals) <- names(rast_vals)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.

site vars vs raster vars

summarise_all(rast_vals, range) %>% t()
##                [,1]     [,2]
## elevation -2.811507 3.647551
## fldis     -2.126014 3.944279
## sidi1ha   -1.156326 3.160600
## slope     -3.155237 5.155190
## farm_type  0.000000 1.000000
summarise_all(site_vals, range) %>% t()
##                [,1]     [,2]
## elevation -1.788999 1.629391
## fldis     -1.380924 2.606278
## sidi1ha   -1.156326 2.282887
## slope     -2.704128 2.725217
## farm_type  0.000000 1.000000

Plot histograms

Site sampled variables (green) are typically a biased sub-section of the range of variables in the rasters (purple). This is not as much of an issue as in forest.

plot_hists <- function(myvar, binwidth=0.25){
  mc <- viridis::viridis(3)
  ggplot(rast_vals, aes(x={{myvar}}, y = stat(density))) +
    geom_histogram(binwidth = binwidth, fill=mc[1], alpha = 0.5) + 
    geom_histogram(data = site_vals, binwidth = binwidth, fill=mc[2], alpha = 0.5)
}

plot_hists(elevation)

plot_hists(fldis)

plot_hists(sidi1ha)

plot_hists(slope)

plot_hists(farm_type, binwidth = 0.5)

Examine betalpsi parameters from each species

# select a reasonable set of samples 
mydraw <- seq(0, dim(samples$chain1)[1], 1000)
mysamples <- list(
  chain1 = samples$chain1[mydraw, ],
  chain2 = samples$chain2[mydraw, ],
  chain3 = samples$chain3[mydraw, ]
)

# extract betas elevation fl_dis sidi1ha   slope farm_type
b0 <- MCMCsummary(mysamples, 
                  params = 'lpsi\\[\\d{1,3}\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b1 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][1]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b2 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][2]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b3 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][3]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b4 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][4]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b5 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][5]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)

tibble(
  ElevationMean = sum(b1$mean > 0), ElevationMedian = sum(b1$`50%`>0),
  FarmDistanceMean = sum(b2$mean > 0), FarmDistanceMedian = sum(b2$`50%`>0),
  Sidi1haMean = sum(b3$mean > 0), Sidi1haMedian = sum(b3$`50%`>0),
  SlopeMean = sum(b4$mean > 0), SlopeMedian = sum(b4$`50%`>0),
  FarmTypeMean = sum(b5$mean > 0), FarmTypeMedian = sum(b5$`50%`>0)
) %>% t %>% 
  as_tibble(rownames = "SummaryType") %>% 
  rename(Positive=V1) %>% 
  mutate(Negative=M-Positive)
## # A tibble: 10 × 3
##    SummaryType        Positive Negative
##    <chr>                 <int>    <dbl>
##  1 ElevationMean            23      153
##  2 ElevationMedian          23      153
##  3 FarmDistanceMean         15      161
##  4 FarmDistanceMedian       19      157
##  5 Sidi1haMean             147       29
##  6 Sidi1haMedian           143       33
##  7 SlopeMean                37      139
##  8 SlopeMedian              42      134
##  9 FarmTypeMean              0      176
## 10 FarmTypeMedian            1      175

Plot the betalpsi params

plotbeta <- function(bp, bname, fgroup=NULL, fgroupName="All species"){
  bp <- bind_cols(bp, 
                  fspp=c(fspp, rep("unknown", Nadd)), 
                  mig=c(mig, rep("unknown", Nadd)),
                  fnDiet=c(fnDiet, rep("unknown", Nadd)),
                  invDiet=c(invDiet, rep("unknown", Nadd)))
  ggplot(arrange(bp,`50%`), 
         aes(x=`50%`, xmin=`2.5%`, xmax=`97.5%`,
             y=1:M))+
    geom_linerange(aes(color={{fgroup}}))+
    geom_point(aes(x=mean), color="darkgrey")+
    geom_point(aes(color={{fgroup}}))+
    geom_vline(xintercept=0, color="red")+
    scale_color_viridis_d()+
    labs(x=paste0(bname," beta parameter:\n95% HDI (lines), mean (grey points), median (color points)"),
         y="Species, sorted by median beta",
         color = fgroupName)+
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank())
}

plotbeta(b1, "Elevation", fspp, "Forest species")

plotbeta(b2, "Farm distance", fspp, "Forest species")

plotbeta(b3, "Landscape diversity (sidi1ha)", fspp, "Forest species")

plotbeta(b4, "Slope", fspp, "Forest species")

plotbeta(b5, "Farm type (new=0)", fspp, "Forest species")

plotbeta(b1, "Elevation", mig, "Migratory species")

plotbeta(b2, "Farm distance", mig, "Migratory species")

plotbeta(b3, "Landscape diversity (sidi1ha)", mig, "Migratory species")

plotbeta(b4, "Slope", mig, "Migratory species")

plotbeta(b5, "Farm type (new=0)", mig, "Migratory species")

Plot beta relationships - more common species in darker colours (purple), rare species in lighter colours (yellow)

newdata <- tibble(
  elevation = seq(min(rast_vals$elevation), max(rast_vals$elevation), length.out=100) %>% rep(2),
  mElevation = mean(rast_vals$elevation),
  fldis = seq(min(rast_vals$fldis), max(rast_vals$fldis), length.out=100) %>% rep(2),
  mFldis = mean(rast_vals$fldis),
  sidi1ha = seq(min(rast_vals$sidi1ha), max(rast_vals$sidi1ha), length.out=100) %>% rep(2),
  mSidi1ha = mean(rast_vals$sidi1ha),
  slope = seq(min(rast_vals$slope), max(rast_vals$slope), length.out=100) %>% rep(2),
  mSlope= mean(rast_vals$slope),
  farm_type = rep(0:1, each = 100)
)

elev_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$elevation + 
    b2$mean[k] * newdata$mFldis +
    b3$mean[k] * newdata$mSidi1ha +
    b4$mean[k] * newdata$mSlope +
    b5$mean[k] * newdata$farm_type
  
  elev_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

fldis_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$mElevation + 
    b2$mean[k] * newdata$fldis +
    b3$mean[k] * newdata$mSidi1ha +
    b4$mean[k] * newdata$mSlope +
    b5$mean[k] * newdata$farm_type
  
  fldis_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

sidi_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$mElevation + 
    b2$mean[k] * newdata$mFldis +
    b3$mean[k] * newdata$sidi1ha +
    b4$mean[k] * newdata$mSlope +
    b5$mean[k] * newdata$farm_type
  
  sidi_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

slope_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$mElevation + 
    b2$mean[k] * newdata$mFldis +
    b3$mean[k] * newdata$mSidi1ha +
    b4$mean[k] * newdata$slope +
    b5$mean[k] * newdata$farm_type
  
  slope_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

## compile into one table
out_psi <- elev_psi %>% 
  as_tibble() %>% 
  bind_cols(newdata) %>% 
  pivot_longer(cols = V1:V176, names_to = "species", values_to = "elev_psi") %>% 
  mutate(species = str_replace(species, "V", "sp"),
         species = factor(species, levels = paste0('sp',1:176))) %>% 
  bind_cols(
    fldis_psi %>% 
      as_tibble() %>% 
      pivot_longer(cols = V1:V176, names_to = "species", values_to = "fldis_psi") %>% 
      select(-species)
  ) %>% 
    bind_cols(
    sidi_psi %>% 
      as_tibble() %>% 
      pivot_longer(cols = V1:V176, names_to = "species", values_to = "sidi_psi") %>% 
      select(-species)
  ) %>% 
    bind_cols(
    slope_psi %>% 
      as_tibble() %>% 
      pivot_longer(cols = V1:V176, names_to = "species", values_to = "slope_psi") %>% 
      select(-species)
  ) %>% 
  add_column(
    b1 = rep(b1$mean, 200),
    b2 = rep(b2$mean, 200),
    b3 = rep(b3$mean, 200),
    b4 = rep(b4$mean, 200),
    fspp = rep(c(fspp, rep(FALSE, Nadd)),200), 
    fnDiet=rep(c(fnDiet, rep(FALSE, Nadd)),200), 
    invDiet=rep(c(invDiet, rep(FALSE, Nadd)),200),
    observed=rep(c(rep("observed", Nobs), rep("absent", Nadd)), 200)
  ) %>% 
  mutate(fspp = if_else(fspp==TRUE, "fspp", "other"),
         fnDiet = if_else(fnDiet==TRUE, "fnDiet", "other"),
         invDiet = if_else(invDiet==TRUE, "invDiet", "other"))

plotpsi <- function(xvar, yvar, xname, yname, oxmin=-Inf, oxmax=Inf){
  ggplot(out_psi, 
         aes(x={{xvar}}, y={{yvar}}, color=species)) +
    # add rectangles showing non-sampled projection area
    annotate("rect", xmin = -Inf, xmax = oxmin, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    annotate("rect", xmin = oxmax, xmax = Inf, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    geom_line(alpha=0.5) + 
    scale_color_viridis_d()+
    theme(legend.position = "none") +
    facet_grid(.~farm_type, labeller =label_both)+
    labs(x=xname, y=yname)
}

plotpsi(elevation, elev_psi, 
        "Elevation (center scaled, other vars at mean)", "psi", 
        min(site_vals$elevation), max(site_vals$elevation))

plotpsi(fldis, fldis_psi, 
        "Farm distance (center scaled, other vars at mean)", "psi", 
        min(site_vals$fldis), max(site_vals$fldis))

plotpsi(sidi1ha, sidi_psi, 
        "Landscape diversity (sidi1ha center scaled, other vars at mean)", "psi", 
        min(site_vals$sidi1ha), max(site_vals$sidi1ha))

plotpsi(slope, slope_psi, 
        "Slope (center scaled, other vars at mean)", "psi", 
        min(site_vals$slope), max(site_vals$slope))

Sum species over the parameters

plotSR <- function(xvar, y, xname, yname, oxmin=-Inf, oxmax=Inf){
  bind_cols(
    newdata,
    SR = rowSums(y),
    SRfspp = rowSums(y[,fspp]),
    SRmig = rowSums(y[,mig]),
    SRfnDiet = rowSums(y[,fnDiet]),
    SRinvDiet = rowSums(y[,invDiet])
  ) %>% 
    pivot_longer(cols = SR:SRinvDiet, names_to = "SpeciesSelection", values_to = "SR") %>% 
  ggplot(., 
         aes(x={{xvar}}, y=SR, color=SpeciesSelection), lwd=2) +
    # add rectangles showing non-sampled projection area
    annotate("rect", xmin = -Inf, xmax = oxmin, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    annotate("rect", xmin = oxmax, xmax = Inf, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    geom_line() + 
    facet_grid(.~farm_type, labeller =label_both)+
    scale_color_viridis_d()+
    labs(x=xname, y=yname)
}

plotSR(elevation, elev_psi, 
        "Elevation (center scaled, other vars at mean)","Species Richness (sum PSI)", 
        min(site_vals$elevation), max(site_vals$elevation))

plotSR(fldis, fldis_psi, 
        "Farm distance (center scaled, other vars at mean)", "Species Richness (sum PSI)", 
        min(site_vals$fldis), max(site_vals$fldis))

plotSR(sidi1ha, sidi_psi, 
        "Landscape diversity (sidi1ha center scaled, other vars at mean)", "Species Richness (sum PSI)", 
        min(site_vals$sidi1ha), max(site_vals$sidi1ha))

plotSR(slope, slope_psi, 
        "Slope (center scaled, other vars at mean)", "Species Richness (sum PSI)", 
        min(site_vals$slope), max(site_vals$slope))

Probability of detection

MCMCsummary(mysamples, 
                  params = "betalp",  
                  round = 2)
##            mean   sd  2.5%   50% 97.5% Rhat n.eff
## betalp[1]  0.13 0.03  0.08  0.13  0.17 1.09    12
## betalp[2]  0.06 0.02  0.01  0.05  0.08 0.78    30
## betalp[3] -0.21 0.04 -0.28 -0.21 -0.17 1.03    24
## betalp[4] -0.09 0.09 -0.21 -0.12  0.08 0.95    12
# extract betas for detection
d0 <- MCMCsummary(mysamples, 
                  params = "lp",  
                  round = 2)
d1 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[1]\\]', ISB = FALSE, exact = FALSE, 
                  round = 2)
d2 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[2]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
d3 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[3]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
d4 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[4]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)

logitp_v1 <- matrix(d0$mean, length(d0$mean), dim(Xob)[1])  +
  d1$mean * Xob[,1,1] + 
  d2$mean * Xob[,1,2] +
  d3$mean * Xob[,1,3] +
  d4$mean * Xob[,1,4] 
logitp_v2 <- matrix(d0$mean, length(d0$mean), dim(Xob)[1])  +
  d1$mean * Xob[,2,1] + 
  d2$mean * Xob[,2,2] +
  d3$mean * Xob[,2,3] +
  d4$mean * Xob[,1,4] 

p_v1 <- exp(logitp_v1)/(exp(logitp_v1) + 1)
p_v2 <- exp(logitp_v2)/(exp(logitp_v2) + 1) 

# mean probability of detection, all species, all sites, on visit 1 and visit 2
mean(p_v1); mean(p_v2)
## [1] 0.05251012
## [1] 0.06659386
# plot probability of detection
tibble(
  mean_p_v1 = rowMeans(p_v1),
  mean_p_v2 = rowMeans(p_v2)
) %>% 
  arrange(-mean_p_v2, -mean_p_v1) %>% 
  add_column(species = 1:M) %>% 
  ggplot(., aes(x=species, ymin=mean_p_v1, ymax=mean_p_v2)) +
  geom_hline(yintercept=mean(p_v1), color="red", lty="dotted") +
  geom_hline(yintercept=mean(p_v2), color="red") +
  geom_linerange() +
  geom_point(aes(y=mean_p_v1), pch=1) +
  geom_point(aes(y=mean_p_v2), color="black") +
  labs(x="Species, sorted by probability of detection (visit 2)", 
       y="Mean probability of detection over all sites\nOpen circles = visit 1; Closed circles = visit 2")